# Table A1

suppressPackageStartupMessages({
  library(tidyverse)
  library(haven)
  library(gt)
})

#' Helper functions
summ <- function(data, variable) {
  tibble(
    Variable = variable,
    Total = weighted.mean(data[[variable]], data[["tot"]], na.rm = TRUE),
    Mean = mean(data[[variable]], na.rm = TRUE),
    Median = median(data[[variable]], na.rm = TRUE),
    SD = sd(data[[variable]], na.rm = TRUE)
  )
}

# variables used
# 
# vars <- c(
#   "P1_001N", # total population
#   "P2_001N", # total urban and rural
#   "P2_002N", # total urban
#   "H4_001N", # total home ownership
#   "H4_002N", # total home own mortgage
#   "H4_003N", # total home own free
#   "P3_001N", # total races
#   "P3_002N", # total White
#   "P3_003N", # total Black
#   "P4_001N", # total Hispanic or latin Origin
#   "P4_003N", # total Hispanic or Latin
#   "P5_001N", # total Hispanic by Race
#   "P5_010N", # total Hispanic or Latin
#   "P12_001N", # total age
#   "P12_002N", # total male
#   "P12_003N", # total male under 5
#   "P12_004N",
#   "P12_005N",
#   "P12_006N", # total male 16-17
#   "P12_020N", # total male 65+
#   "P12_021N",
#   "P12_022N",
#   "P12_023N",
#   "P12_024N",
#   "P12_025N",
#   "P12_026N", # total female
#   "P12_027N", # total female under 18
#   "P12_028N",
#   "P12_029N",
#   "P12_030N",
#   "P12_044N", # total female 65+
#   "P12_045N",
#   "P12_046N",
#   "P12_047N",
#   "P12_048N",
#   "P12_049N"
# )
# 
# ## load census data using get_decennial function from tidycensus
# cen <- tidycensus::get_decennial(
#   year = 2020,
#   geography = "county",
#   keep_geo_vars = TRUE,
#   variables = vars,
#   sumfile = "dhc",
#   output = "wide"
# )

# Data  -------
all_cen <-  read_dta("tbA1_counties-demographics.dta")
cvr_cen <-  filter(all_cen, county_name != "") # those used in this paper


## calc summary stats for cvr data counties
sum_vars <- c(
  "prop_white",
  "prop_black", "prop_hisp",
  "prop_u18", "prop_o65",
  "prop_urb",
  "prop_own_occ"
)

summary_all <- map(sum_vars, \(x) summ(all_cen, x)) |> bind_rows()
summary_sub <- map(sum_vars, \(x) summ(cvr_cen, x)) |> bind_rows()

## combine census and cvr subset
sum_stats <-
  left_join(summary_sub, summary_all, by = "Variable",
            suffix = c("_OurData", "_National")) |>
  mutate(Variable = case_match(
    Variable,
    "prop_white" ~ "Percent White",
    "prop_black" ~ "Percent Black",
    "prop_hisp" ~ "Percent Hispanic",
    "prop_u18" ~ "Percent Under 18",
    "prop_o65" ~ "Percent Over 65",
    "prop_urb" ~ "Percent Urban",
    "prop_own_occ" ~ "Percent Homeowning")
  )

# Paste only the contents (not longtable header) to appendix ----
sum_stats |>
  select(-matches("SD")) |> 
  gt() |>
  tab_options(table.font.size = px(13)) |>
  fmt_number(matches("_National|_OurData"), decimals = 1) |>
  cols_label("Variable" ~ "", ends_with("OurData") ~ "CVR", ends_with("National") ~ "Nation") |>
  tab_spanner("Overall", columns = matches("^Total")) |>
  tab_spanner("Average", columns = matches("^Mean")) |>
  tab_spanner("Median", columns = matches("^Median")) |>
  gt::as_latex() |> 
  cat()

nrow(all_cen)
nrow(cvr_cen)

